rm(list=ls(all=TRUE))
library(dplyr)
library(foreign)
library(MCMCpack)
library(taRifx)
setwd() # set working directory
set.seed(220915)
board.2005 <- read.table("data/raw.2005.boards.csv", sep=";",header=TRUE)[,1:4]
raw.2005  <- read.table("data/raw.2005.cand.csv", sep=";",header=TRUE)[,1:13]
coalitions2005 <- read.table("data/coalitions05.csv", sep=";",header=TRUE)

##Removing any information about location from candidate name for board list. 
##Identify parties by only their list letter in the boards
names <- strsplit(as.character(board.2005$candname), ",")
parties <- strsplit(as.character(board.2005$candpart),",")
cand.names <- rep("NA",dim(board.2005)[1])
cand.part <- rep("NA",dim(board.2005)[1])

for (i in 1:length(cand.names)){
  cand.names[i] <- names[[i]][1]
  cand.part[i]  <- parties[[i]][1]
}

board.2005$candname <- cand.names
board.2005$candpart <- cand.part


##Removing any information about location from candidate name for candidate list. 
names <- strsplit(as.character(raw.2005$candname), ",")
cand.names <- rep("NA",dim(raw.2005)[1])

for (i in 1:length(cand.names)){
  cand.names[i] <- names[[i]][1]
}

raw.2005$candname <- cand.names

raw.2005$candmano <- NULL

##Two individuals in Ringk?bing-Skjern have the same name and run for the same party
##in 2005. Since none of them are in the 2001 or 2009 data, they are both deleted
##The same apply to two individuals in Faxe in 2005. Both are deleted.

dupli1 <- duplicated(raw.2005[,c("muncpr","candname","candpart")])
dupli2 <- duplicated(raw.2005[,c("muncpr","candname","candpart")], fromLast = TRUE)

raw.2005 <- subset(raw.2005, (dupli1+dupli2)==0)

dupli1 <- duplicated(board.2005[,c("muncpr","candname","candpart")])
dupli2 <- duplicated(board.2005[,c("muncpr","candname","candpart")], fromLast = TRUE)

board.2005 <- subset(board.2005, (dupli1+dupli2)==0)

raw.2005 <- merge(raw.2005, board.2005, by=c("muncpr","candname","candpart"), all = TRUE)
raw.2005 <- raw.2005[!is.na(raw.2005$candvote),]

board.2005 <- left_join(board.2005, cbind(raw.2005[,c("muncpr","candname","candpart")],1), by=c("muncpr","candname","candpart"))
board.2005[is.na(board.2005[,5]),]

##add dummy for election year
raw.2005$year <- 2005
rm(cand.names,names,i,dupli1,dupli2,parties,cand.part)

##Generating variable for being elected

raw.2005$elected <- as.numeric(raw.2005$candmano>0)
raw.2005$elected[is.na(raw.2005$elected)] <- 0
rm(board.2005)

colnames(coalitions2005)[3] <- "coalitions"

#Merge on coalitions

raw.2005 <- left_join(raw.2005, coalitions2005, by = c("muncpr", "candpart"))

maxblocks <- length(unique(raw.2005$coalitions)) - 1 
muncprs <- unique(raw.2005$muncpr)

raw.2005$partynum <- 0

parties <- unique(raw.2005$candpart)
for (j in 1:length(parties)){
  raw.2005$partynum[raw.2005$candpart == parties[j]] <- j+maxblocks
}

raw.2005$coalitions[is.na(raw.2005$coalition)] <- raw.2005$partynum[is.na(raw.2005$coalition)]

#Alternative distribution of seats. Subset and run one municipality at a time
#subset data 

munfiles <- list()
size <- 10000
for ( r in 1:length(muncprs)){
  i <- muncprs[r]
  subdata <- raw.2005[raw.2005$muncpr == i,]
  
  # Simulate votes for party
  
  pvotes <- aggregate(subdata[,c("ptotvote")], by = list(subdata$candpart), FUN = mean)
  
  if (sum(pvotes$x)  != subdata$mtotvavo[1]) 
    pvotes <- rbind(pvotes,c("other",(subdata$mtotvavo[1]-sum(pvotes$x))))
  
  pvotes$x <- as.numeric(pvotes$x)
  
  simPartyVotes <- replicate(table(expr = sample(x      = pvotes$Group.1, 
                                                 size    = sum(pvotes$x),
                                                 replace = TRUE, 
                                                 prob    = pvotes$x )),
                             n = size)
  
  cpart <- unique(subdata$candpart)
  
  simCandVotes <- NULL
  
  for (l in 1:length(cpart)){
    subsub <- subdata[subdata$candpart==cpart[l],]
    candnames <- data.frame(subsub$candname)
    sim <- NULL
    for (j in 1:size ){
      t <-  table(c(sample(x       = c(subsub$candname, 99),
                           size    = simPartyVotes[cpart[l], j],
                           replace = TRUE,
                           prob    = c(subsub$candvote, subsub$listvote[1])),
                    subsub$candname, 99 ) ) - rep(1, nrow(subsub) + 1)
      sim <- cbind(sim, t)
      }

    simCandVotes <- rbind(simCandVotes, cbind(cpart[l], sim))
  }

  suppressWarnings({
    simCandVotes <- data.frame(cbind(rownames(simCandVotes), simCandVotes))
    colnames(simCandVotes)[1:2] <- c("candname","candpart")
    simCandVotes <- 
      simCandVotes %>%
      filter(candname != "99" & candpart != "other")
    
    votesSim   <- left_join(simCandVotes, 
                            subdata[,c("candpart", 
                                       "candlino", 
                                       "candname",
                                       "openlist",
                                       "coalitions",
                                       "ptotvote",
                                       "candvote")], 
                            by=c("candpart","candname"))
  })
  #allocating seats to coalitions
  seats      <- sum(subdata$elected) 
  coalitions <- aggregate(subdata$coalitions, by=list(subdata$candpart),FUN=mean)
  simPartyVotes <- data.frame(simPartyVotes)
  simPartyVotes$Group.1 <- rownames(simPartyVotes)
  simPartyVotesEl <- left_join(coalitions, simPartyVotes, by = "Group.1")
  coalitAl  <- aggregate(simPartyVotesEl[,-c(1:2)],  by=list(simPartyVotesEl[,2]),FUN=sum)

  
  for (k in 1:size){
    divMat   <- array(NA,c(nrow(coalitAl),seats))
    for( m in 1:seats){
      divMat[,m] <- coalitAl[,(k+1)]/m
    }
    
    divWin <- sort(divMat, decreasing = TRUE)[1:seats]
    divMat <- matrix(ifelse(divMat %in% divWin,1,0),nrow(divMat),ncol(divMat))
    
    coalitAl[,k+1] <- rowSums(divMat)
    
  }
  
  ##allocating seats to parties 
  coalind <- unique(simPartyVotesEl[,2])
  
  seatsData <- data.frame(array(NA,c(size,length(cpart))))
  colnames(seatsData) <- cpart
  
   for (l in coalind){
    partyVotes <- simPartyVotesEl[l == simPartyVotesEl[, 2], ]
    for (k in 1:size){
      cSeats <- coalitAl[coalitAl$Group.1==l,(k+1)] 
      if (cSeats==0) seatsData[k, coalitions[coalitions[, 2] == l, 1]] <- 0 else{
        divMat   <- array(NA, c(length(coalitions[coalitions[, 2] == l, 1]), cSeats))
        for( m in 1:cSeats){
          if (is.vector(partyVotes)) 
            divMat[m] <- partyVotes[(k+2)]/m else{
              divMat[,m] <- partyVotes[,(k+2)]/m
            }
        }
      
        divWin <- sort(divMat, decreasing = TRUE)[1:cSeats]
        divMat <- matrix(ifelse(divMat %in% divWin,1,0),nrow(divMat),ncol(divMat))
      
        if (is.vector(partyVotes)) partyVotes[k+2] <- sum(divMat) else {
          partyVotes[,k+2] <- rowSums(divMat)
        }
    
    
    if (is.vector(partyVotes)) 
      seatsData[k,coalitions[coalitions[,2]==l,1]] <- partyVotes[k+2] else{
        seatsData[k,coalitions[coalitions[,2]==l,1]] <- t(partyVotes[,k+2])
      }
      }
    }
  }

  for(p in 1:size) {
    votesSim[,p+2] <- destring(votesSim[,p+2])
  }
  
  ##Allocate seats
  seatsSim <- votesSim
  
  for (l in 1:length(cpart)){
    for (k in 1:size) {
      iterseats <- seatsData[k,cpart[l]]
      winvotes  <- sort(votesSim[votesSim$candpart==cpart[l],(k+2)],decreasing=TRUE)[1:iterseats]
      if (iterseats == 0) winvotes <- 99999
      seatsSim[votesSim$candpart==cpart[l],(k+2)]  <- 
        ifelse(destring(votesSim[votesSim$candpart==cpart[l],(k+2)]) %in% winvotes, 1, 0)
    }
  }
  munfiles[[r]] <- seatsSim
  print(i)
}

simScores <- cbind(101,munfiles[[1]][,-(3:(size+2))],rowMeans(munfiles[[1]][,(3:(size+2))]))
colnames(simScores)[c(1,9)] <- c("muncpr","simscore")
for (r in 2:length(muncprs)){
  temp  <- cbind(muncprs[r],munfiles[[r]][,-(3:(size+2))],rowMeans(munfiles[[r]][,(3:(size+2))]))
  colnames(temp)[c(1,9)] <- c("muncpr","simscore")
  simScores <- rbind(simScores,temp)
}

raw.2005$splitlist[is.na(raw.2005$splitlist)==1] <- 0
raw.2005  <- left_join(raw.2005,simScores[,c("muncpr","candpart","candname","simscore")],by=c("muncpr","candpart","candname"))

##Create thresholds

pthres <- function (mun,party,simscore,elected) {
  up  <- aggregate(simscore, list(party, mun, elected), FUN=min)
  low <- aggregate(simscore, list(party, mun, elected), FUN=max)
  up  <- up[up$Group.3==1,]
  low <- low[low$Group.3==0,]
  
  sample <- merge(up,low,by=c("Group.2","Group.1"),all=TRUE)
  sample <- sample[!is.na(sample$x.x)==1,]
  sample$threshold <- (sample$x.x-sample$x.y)/2+sample$x.y
  sample <- sample[,c(1,2,4,6,7)]
  colnames(sample) <- c("muncpr", "candpart2","marg.win","marg.los","marg.thres")
  return(sample)
}

out <- pthres(raw.2005$muncpr,raw.2005$candpart,raw.2005$simscore,raw.2005$elected)
raw.2005$candpart2 <- raw.2005$candpart
raw.2005 <- left_join(raw.2005,out,by=c("muncpr","candpart2"),all.x=TRUE)
raw.2005$margsim <- raw.2005$simscore - raw.2005$marg.thres

###################
###################
### repeat for 2009 
###################
###################

board.2009 <- read.table("data/raw.2009.boards.csv", sep=";",header=TRUE)[,1:4]
raw.2009  <- read.table("data/raw.2009.cand.csv", sep=";",header=TRUE)[,1:13]
coalitions2009 <- read.table("data/coalitions09_2.txt", sep=";")
colnames(coalitions2009) <- c("muncpr","candpart","coalition")
##Removing any information about location from candidate name for board list. 
##Identify parties by only their list letter in the boards
names <- strsplit(as.character(board.2009$candname), ",")
parties <- strsplit(as.character(board.2009$candpart),",")
cand.names <- rep("NA",dim(board.2009)[1])
cand.part <- rep("NA",dim(board.2009)[1])

for (i in 1:length(cand.names)){
  cand.names[i] <- names[[i]][1]
  cand.part[i]  <- parties[[i]][1]
}

board.2009$candname <- cand.names
board.2009$candpart <- cand.part


##Removing any information about location from candidate name for candidate list. 
names <- strsplit(as.character(raw.2009$candname), ",")
cand.names <- rep("NA",dim(raw.2009)[1])

for (i in 1:length(cand.names)){
  cand.names[i] <- names[[i]][1]
}

raw.2009$candname <- cand.names

raw.2009$candmano <- NULL

dupli1 <- duplicated(raw.2009[,c("muncpr","candname","candpart")])
dupli2 <- duplicated(raw.2009[,c("muncpr","candname","candpart")], fromLast = TRUE)

raw.2009 <- subset(raw.2009, (dupli1+dupli2)==0)

dupli1 <- duplicated(board.2009[,c("muncpr","candname","candpart")])
dupli2 <- duplicated(board.2009[,c("muncpr","candname","candpart")], fromLast = TRUE)

board.2009 <- subset(board.2009, (dupli1+dupli2)==0)

raw.2009 <- merge(raw.2009, board.2009, by=c("muncpr","candname","candpart"), all = TRUE)
raw.2009 <- raw.2009[!is.na(raw.2009$candvote),]

board.2009 <- left_join(board.2009, cbind(raw.2009[,c("muncpr","candname","candpart")],1), by=c("muncpr","candname","candpart"))
board.2009[is.na(board.2009[,5]),]

##add dummy for election year
raw.2009$year <- 2009
rm(cand.names,names,i,dupli1,dupli2,parties,cand.part)

##Generating variable for being elected

raw.2009$elected <- as.numeric(raw.2009$candmano>0)
raw.2009$elected[is.na(raw.2009$elected)] <- 0
rm(board.2009)

colnames(coalitions2009)[3] <- "coalitions"

#Merge on coalitions

raw.2009 <- left_join(raw.2009, coalitions2009, by = c("muncpr", "candpart"))

maxblocks <- length(unique(raw.2009$coalitions)) - 1 
muncprs <- unique(raw.2009$muncpr)

raw.2009$partynum <- 0

parties <- unique(raw.2009$candpart)
for (j in 1:length(parties)){
  raw.2009$partynum[raw.2009$candpart == parties[j]] <- j+maxblocks
}

raw.2009$coalitions[is.na(raw.2009$coalition)] <- raw.2009$partynum[is.na(raw.2009$coalition)]


#Alternative distribution of seats. Subset and run one municipality at a time
#subset data 

munfiles <- list()

for ( r in 1:length(muncprs)){
  i <- muncprs[r]
  subdata <- raw.2009[raw.2009$muncpr == i,]
  
  # Simulate votes for party
  
  pvotes <- aggregate(subdata[,c("ptotvote")], by = list(subdata$candpart), FUN = mean)
  
  if (sum(pvotes$x)  != subdata$mtotvavo[1]) 
    pvotes <- rbind(pvotes,c("other",(subdata$mtotvavo[1]-sum(pvotes$x))))
  
  pvotes$x <- as.numeric(pvotes$x)
  
  simPartyVotes <- replicate(table(expr = sample(x      = pvotes$Group.1, 
                                                 size    = sum(pvotes$x),
                                                 replace = TRUE, 
                                                 prob    = pvotes$x )),
                             n = size)
  
  cpart <- unique(subdata$candpart)
  
  simCandVotes <- NULL
  
  for (l in 1:length(cpart)){
    subsub <- subdata[subdata$candpart==cpart[l],]
    candnames <- data.frame(subsub$candname)
    sim <- NULL
    for (j in 1:size ){
      t <-  table(c(sample(x       = c(subsub$candname, 99),
                           size    = simPartyVotes[cpart[l], j],
                           replace = TRUE,
                           prob    = c(subsub$candvote, subsub$listvote[1])),
                    subsub$candname, 99 ) ) - rep(1, nrow(subsub) + 1)
      sim <- cbind(sim, t)
    }
    
    simCandVotes <- rbind(simCandVotes, cbind(cpart[l], sim))
  }
  
  suppressWarnings({
    simCandVotes <- data.frame(cbind(rownames(simCandVotes), simCandVotes))
    colnames(simCandVotes)[1:2] <- c("candname","candpart")
    simCandVotes <- 
      simCandVotes %>%
      filter(candname != "99" & candpart != "other")
    
    votesSim   <- left_join(simCandVotes, 
                            subdata[,c("candpart", 
                                       "candlino", 
                                       "candname",
                                       "openlist",
                                       "coalitions",
                                       "ptotvote",
                                       "candvote")], 
                            by=c("candpart","candname"))
  })
  #allocating seats to coalitions
  seats      <- sum(subdata$elected) 
  coalitions <- aggregate(subdata$coalitions, by=list(subdata$candpart),FUN=mean)
  simPartyVotes <- data.frame(simPartyVotes)
  simPartyVotes$Group.1 <- rownames(simPartyVotes)
  simPartyVotesEl <- left_join(coalitions, simPartyVotes, by = "Group.1")
  coalitAl  <- aggregate(simPartyVotesEl[,-c(1:2)],  by=list(simPartyVotesEl[,2]),FUN=sum)
  
  
  for (k in 1:size){
    divMat   <- array(NA,c(nrow(coalitAl),seats))
    for( m in 1:seats){
      divMat[,m] <- coalitAl[,(k+1)]/m
    }
    
    divWin <- sort(divMat, decreasing = TRUE)[1:seats]
    divMat <- matrix(ifelse(divMat %in% divWin,1,0),nrow(divMat),ncol(divMat))
    
    coalitAl[,k+1] <- rowSums(divMat)
    
  }
  
  ##allocating seats to parties 
  coalind <- unique(simPartyVotesEl[,2])
  
  seatsData <- data.frame(array(NA,c(size,length(cpart))))
  colnames(seatsData) <- cpart
  
  for (l in coalind){
    partyVotes <- simPartyVotesEl[l == simPartyVotesEl[, 2], ]
    for (k in 1:size){
      cSeats <- coalitAl[coalitAl$Group.1==l,(k+1)] 
      if (cSeats==0) seatsData[k, coalitions[coalitions[, 2] == l, 1]] <- 0 else{
        divMat   <- array(NA, c(length(coalitions[coalitions[, 2] == l, 1]), cSeats))
        for( m in 1:cSeats){
          if (is.vector(partyVotes)) 
            divMat[m] <- partyVotes[(k+2)]/m else{
              divMat[,m] <- partyVotes[,(k+2)]/m
            }
        }
        
        divWin <- sort(divMat, decreasing = TRUE)[1:cSeats]
        divMat <- matrix(ifelse(divMat %in% divWin,1,0),nrow(divMat),ncol(divMat))
        
        if (is.vector(partyVotes)) partyVotes[k+2] <- sum(divMat) else {
          partyVotes[,k+2] <- rowSums(divMat)
        }
        
        
        if (is.vector(partyVotes)) 
          seatsData[k,coalitions[coalitions[,2]==l,1]] <- partyVotes[k+2] else{
            seatsData[k,coalitions[coalitions[,2]==l,1]] <- t(partyVotes[,k+2])
          }
      }
    }
  }
  
  for(p in 1:size) {
    votesSim[,p+2] <- destring(votesSim[,p+2])
  }
  
  ##Allocate seats
  seatsSim <- votesSim
  
  for (l in 1:length(cpart)){
    for (k in 1:size) {
      iterseats <- seatsData[k,cpart[l]]
      winvotes  <- sort(votesSim[votesSim$candpart==cpart[l],(k+2)],decreasing=TRUE)[1:iterseats]
      if (iterseats == 0) winvotes <- 99999
      seatsSim[votesSim$candpart==cpart[l],(k+2)]  <- 
        ifelse(destring(votesSim[votesSim$candpart==cpart[l],(k+2)]) %in% winvotes, 1, 0)
    }
  }
  munfiles[[r]] <- seatsSim
  print(i)
}

simScores <- cbind(101,munfiles[[1]][,-(3:(size+2))],rowMeans(munfiles[[1]][,(3:(size+2))]))
colnames(simScores)[c(1,9)] <- c("muncpr","simscore")
for (r in 2:length(muncprs)){
  temp  <- cbind(muncprs[r],munfiles[[r]][,-(3:(size+2))],rowMeans(munfiles[[r]][,(3:(size+2))]))
  colnames(temp)[c(1,9)] <- c("muncpr","simscore")
  simScores <- rbind(simScores,temp)
}

raw.2009$splitlist[is.na(raw.2009$splitlist)==1] <- 0
raw.2009  <- left_join(raw.2009,simScores[,c("muncpr","candpart","candname","simscore")],by=c("muncpr","candpart","candname"))

##Create thresholds

out <- pthres(raw.2009$muncpr,raw.2009$candpart,raw.2009$simscore,raw.2009$elected)
raw.2009$candpart2 <- raw.2009$candpart
raw.2009 <- left_join(raw.2009,out,by=c("muncpr","candpart2"),all.x=TRUE)
raw.2009$margsim <- raw.2009$simscore - raw.2009$marg.thres


data2 <- data.frame(rbind(raw.2005,raw.2009))

##load in data with electedt1 and rerun and merge on
load("data/data.rdata")

data <- data[,c("muncpr", "candpart", "candname", "year","electedt1","rerun")]
data_alt <- left_join(data2, data, by=c("muncpr", "candpart", "candname", "year"))

save(data_alt, file = "data/data_alt_boot.rdata")
